{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "KaggleMC_HousePrices.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyNEMHaoBIzaAuhqIYdTmGkB",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Divyanshu-ISM/Oil-and-Gas-data-analysis/blob/master/KaggleMC_HousePrices.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2y0t_2Hm1R72",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# RANDOM FOREST AND DECISION TREEE MODEL - KAGGLE MICROCOURSE"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "O3txtEb32D9W",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import seaborn as sns\n",
        "import matplotlib.pyplot as plt\n",
        "%matplotlib inline"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kdWWjieE2a28",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df = pd.read_csv('melb_data.csv')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Nco_AEGQ2hS2",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 333
        },
        "outputId": "f2024d9c-a8a6-4ce7-f6b0-752f0ffdb900"
      },
      "source": [
        "df.head()"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Suburb</th>\n",
              "      <th>Address</th>\n",
              "      <th>Rooms</th>\n",
              "      <th>Type</th>\n",
              "      <th>Price</th>\n",
              "      <th>Method</th>\n",
              "      <th>SellerG</th>\n",
              "      <th>Date</th>\n",
              "      <th>Distance</th>\n",
              "      <th>Postcode</th>\n",
              "      <th>Bedroom2</th>\n",
              "      <th>Bathroom</th>\n",
              "      <th>Car</th>\n",
              "      <th>Landsize</th>\n",
              "      <th>BuildingArea</th>\n",
              "      <th>YearBuilt</th>\n",
              "      <th>CouncilArea</th>\n",
              "      <th>Lattitude</th>\n",
              "      <th>Longtitude</th>\n",
              "      <th>Regionname</th>\n",
              "      <th>Propertycount</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Abbotsford</td>\n",
              "      <td>85 Turner St</td>\n",
              "      <td>2</td>\n",
              "      <td>h</td>\n",
              "      <td>1480000.0</td>\n",
              "      <td>S</td>\n",
              "      <td>Biggin</td>\n",
              "      <td>3/12/2016</td>\n",
              "      <td>2.5</td>\n",
              "      <td>3067.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>202.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Yarra</td>\n",
              "      <td>-37.7996</td>\n",
              "      <td>144.9984</td>\n",
              "      <td>Northern Metropolitan</td>\n",
              "      <td>4019.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Abbotsford</td>\n",
              "      <td>25 Bloomburg St</td>\n",
              "      <td>2</td>\n",
              "      <td>h</td>\n",
              "      <td>1035000.0</td>\n",
              "      <td>S</td>\n",
              "      <td>Biggin</td>\n",
              "      <td>4/02/2016</td>\n",
              "      <td>2.5</td>\n",
              "      <td>3067.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>156.0</td>\n",
              "      <td>79.0</td>\n",
              "      <td>1900.0</td>\n",
              "      <td>Yarra</td>\n",
              "      <td>-37.8079</td>\n",
              "      <td>144.9934</td>\n",
              "      <td>Northern Metropolitan</td>\n",
              "      <td>4019.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Abbotsford</td>\n",
              "      <td>5 Charles St</td>\n",
              "      <td>3</td>\n",
              "      <td>h</td>\n",
              "      <td>1465000.0</td>\n",
              "      <td>SP</td>\n",
              "      <td>Biggin</td>\n",
              "      <td>4/03/2017</td>\n",
              "      <td>2.5</td>\n",
              "      <td>3067.0</td>\n",
              "      <td>3.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>134.0</td>\n",
              "      <td>150.0</td>\n",
              "      <td>1900.0</td>\n",
              "      <td>Yarra</td>\n",
              "      <td>-37.8093</td>\n",
              "      <td>144.9944</td>\n",
              "      <td>Northern Metropolitan</td>\n",
              "      <td>4019.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Abbotsford</td>\n",
              "      <td>40 Federation La</td>\n",
              "      <td>3</td>\n",
              "      <td>h</td>\n",
              "      <td>850000.0</td>\n",
              "      <td>PI</td>\n",
              "      <td>Biggin</td>\n",
              "      <td>4/03/2017</td>\n",
              "      <td>2.5</td>\n",
              "      <td>3067.0</td>\n",
              "      <td>3.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>94.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Yarra</td>\n",
              "      <td>-37.7969</td>\n",
              "      <td>144.9969</td>\n",
              "      <td>Northern Metropolitan</td>\n",
              "      <td>4019.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Abbotsford</td>\n",
              "      <td>55a Park St</td>\n",
              "      <td>4</td>\n",
              "      <td>h</td>\n",
              "      <td>1600000.0</td>\n",
              "      <td>VB</td>\n",
              "      <td>Nelson</td>\n",
              "      <td>4/06/2016</td>\n",
              "      <td>2.5</td>\n",
              "      <td>3067.0</td>\n",
              "      <td>3.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>120.0</td>\n",
              "      <td>142.0</td>\n",
              "      <td>2014.0</td>\n",
              "      <td>Yarra</td>\n",
              "      <td>-37.8072</td>\n",
              "      <td>144.9941</td>\n",
              "      <td>Northern Metropolitan</td>\n",
              "      <td>4019.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "       Suburb           Address  ...             Regionname Propertycount\n",
              "0  Abbotsford      85 Turner St  ...  Northern Metropolitan        4019.0\n",
              "1  Abbotsford   25 Bloomburg St  ...  Northern Metropolitan        4019.0\n",
              "2  Abbotsford      5 Charles St  ...  Northern Metropolitan        4019.0\n",
              "3  Abbotsford  40 Federation La  ...  Northern Metropolitan        4019.0\n",
              "4  Abbotsford       55a Park St  ...  Northern Metropolitan        4019.0\n",
              "\n",
              "[5 rows x 21 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 5
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4SIb9XX-39tQ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 487
        },
        "outputId": "8d6faa88-649f-4bde-9f96-949169fb85b5"
      },
      "source": [
        "df.info()"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 13580 entries, 0 to 13579\n",
            "Data columns (total 21 columns):\n",
            " #   Column         Non-Null Count  Dtype  \n",
            "---  ------         --------------  -----  \n",
            " 0   Suburb         13580 non-null  object \n",
            " 1   Address        13580 non-null  object \n",
            " 2   Rooms          13580 non-null  int64  \n",
            " 3   Type           13580 non-null  object \n",
            " 4   Price          13580 non-null  float64\n",
            " 5   Method         13580 non-null  object \n",
            " 6   SellerG        13580 non-null  object \n",
            " 7   Date           13580 non-null  object \n",
            " 8   Distance       13580 non-null  float64\n",
            " 9   Postcode       13580 non-null  float64\n",
            " 10  Bedroom2       13580 non-null  float64\n",
            " 11  Bathroom       13580 non-null  float64\n",
            " 12  Car            13518 non-null  float64\n",
            " 13  Landsize       13580 non-null  float64\n",
            " 14  BuildingArea   7130 non-null   float64\n",
            " 15  YearBuilt      8205 non-null   float64\n",
            " 16  CouncilArea    12211 non-null  object \n",
            " 17  Lattitude      13580 non-null  float64\n",
            " 18  Longtitude     13580 non-null  float64\n",
            " 19  Regionname     13580 non-null  object \n",
            " 20  Propertycount  13580 non-null  float64\n",
            "dtypes: float64(12), int64(1), object(8)\n",
            "memory usage: 2.2+ MB\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mQburKeK4F6n",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 304
        },
        "outputId": "226802be-bdba-4037-ace4-c2b82036faca"
      },
      "source": [
        "df.describe()"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Rooms</th>\n",
              "      <th>Price</th>\n",
              "      <th>Distance</th>\n",
              "      <th>Postcode</th>\n",
              "      <th>Bedroom2</th>\n",
              "      <th>Bathroom</th>\n",
              "      <th>Car</th>\n",
              "      <th>Landsize</th>\n",
              "      <th>BuildingArea</th>\n",
              "      <th>YearBuilt</th>\n",
              "      <th>Lattitude</th>\n",
              "      <th>Longtitude</th>\n",
              "      <th>Propertycount</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>13580.000000</td>\n",
              "      <td>1.358000e+04</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>13518.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>7130.000000</td>\n",
              "      <td>8205.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "      <td>13580.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>2.937997</td>\n",
              "      <td>1.075684e+06</td>\n",
              "      <td>10.137776</td>\n",
              "      <td>3105.301915</td>\n",
              "      <td>2.914728</td>\n",
              "      <td>1.534242</td>\n",
              "      <td>1.610075</td>\n",
              "      <td>558.416127</td>\n",
              "      <td>151.967650</td>\n",
              "      <td>1964.684217</td>\n",
              "      <td>-37.809203</td>\n",
              "      <td>144.995216</td>\n",
              "      <td>7454.417378</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>0.955748</td>\n",
              "      <td>6.393107e+05</td>\n",
              "      <td>5.868725</td>\n",
              "      <td>90.676964</td>\n",
              "      <td>0.965921</td>\n",
              "      <td>0.691712</td>\n",
              "      <td>0.962634</td>\n",
              "      <td>3990.669241</td>\n",
              "      <td>541.014538</td>\n",
              "      <td>37.273762</td>\n",
              "      <td>0.079260</td>\n",
              "      <td>0.103916</td>\n",
              "      <td>4378.581772</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>1.000000</td>\n",
              "      <td>8.500000e+04</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>3000.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>1196.000000</td>\n",
              "      <td>-38.182550</td>\n",
              "      <td>144.431810</td>\n",
              "      <td>249.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>2.000000</td>\n",
              "      <td>6.500000e+05</td>\n",
              "      <td>6.100000</td>\n",
              "      <td>3044.000000</td>\n",
              "      <td>2.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>177.000000</td>\n",
              "      <td>93.000000</td>\n",
              "      <td>1940.000000</td>\n",
              "      <td>-37.856822</td>\n",
              "      <td>144.929600</td>\n",
              "      <td>4380.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>3.000000</td>\n",
              "      <td>9.030000e+05</td>\n",
              "      <td>9.200000</td>\n",
              "      <td>3084.000000</td>\n",
              "      <td>3.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>2.000000</td>\n",
              "      <td>440.000000</td>\n",
              "      <td>126.000000</td>\n",
              "      <td>1970.000000</td>\n",
              "      <td>-37.802355</td>\n",
              "      <td>145.000100</td>\n",
              "      <td>6555.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>3.000000</td>\n",
              "      <td>1.330000e+06</td>\n",
              "      <td>13.000000</td>\n",
              "      <td>3148.000000</td>\n",
              "      <td>3.000000</td>\n",
              "      <td>2.000000</td>\n",
              "      <td>2.000000</td>\n",
              "      <td>651.000000</td>\n",
              "      <td>174.000000</td>\n",
              "      <td>1999.000000</td>\n",
              "      <td>-37.756400</td>\n",
              "      <td>145.058305</td>\n",
              "      <td>10331.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>10.000000</td>\n",
              "      <td>9.000000e+06</td>\n",
              "      <td>48.100000</td>\n",
              "      <td>3977.000000</td>\n",
              "      <td>20.000000</td>\n",
              "      <td>8.000000</td>\n",
              "      <td>10.000000</td>\n",
              "      <td>433014.000000</td>\n",
              "      <td>44515.000000</td>\n",
              "      <td>2018.000000</td>\n",
              "      <td>-37.408530</td>\n",
              "      <td>145.526350</td>\n",
              "      <td>21650.000000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "              Rooms         Price  ...    Longtitude  Propertycount\n",
              "count  13580.000000  1.358000e+04  ...  13580.000000   13580.000000\n",
              "mean       2.937997  1.075684e+06  ...    144.995216    7454.417378\n",
              "std        0.955748  6.393107e+05  ...      0.103916    4378.581772\n",
              "min        1.000000  8.500000e+04  ...    144.431810     249.000000\n",
              "25%        2.000000  6.500000e+05  ...    144.929600    4380.000000\n",
              "50%        3.000000  9.030000e+05  ...    145.000100    6555.000000\n",
              "75%        3.000000  1.330000e+06  ...    145.058305   10331.000000\n",
              "max       10.000000  9.000000e+06  ...    145.526350   21650.000000\n",
              "\n",
              "[8 rows x 13 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8-o5SEcS4U_o",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 101
        },
        "outputId": "e80dcf63-5fb1-4429-c76c-9a1b5485eedd"
      },
      "source": [
        "df.columns"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',\n",
              "       'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',\n",
              "       'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',\n",
              "       'Longtitude', 'Regionname', 'Propertycount'],\n",
              "      dtype='object')"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qKm3e9_h-oxY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "X = df[['Rooms','Bathroom','Landsize','Lattitude','Longtitude']]\n",
        "y = df[['Price']]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lmhcxmfZ--PI",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn.tree import DecisionTreeRegressor"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bFzOiRAl_GNc",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "melb_model = DecisionTreeRegressor(random_state=1)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pZfEnvfc_TYQ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 118
        },
        "outputId": "fae6d6d1-6bad-4cd4-f5e5-7eeafaf2b671"
      },
      "source": [
        "melb_model.fit(X,y)"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n",
              "                      max_features=None, max_leaf_nodes=None,\n",
              "                      min_impurity_decrease=0.0, min_impurity_split=None,\n",
              "                      min_samples_leaf=1, min_samples_split=2,\n",
              "                      min_weight_fraction_leaf=0.0, presort='deprecated',\n",
              "                      random_state=1, splitter='best')"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hW6Gvm5Y_cJS",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 134
        },
        "outputId": "cbec7205-6d80-41a7-fa7f-7fa4d54d0619"
      },
      "source": [
        "print(\"Making predictions for the following 5 houses:\")\n",
        "print(X.head())\n"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Making predictions for the following 5 houses:\n",
            "   Rooms  Bathroom  Landsize  Lattitude  Longtitude\n",
            "0      2       1.0     202.0   -37.7996    144.9984\n",
            "1      2       1.0     156.0   -37.8079    144.9934\n",
            "2      3       2.0     134.0   -37.8093    144.9944\n",
            "3      3       2.0      94.0   -37.7969    144.9969\n",
            "4      4       1.0     120.0   -37.8072    144.9941\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Ah-b3VhS_yp3",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 50
        },
        "outputId": "22bba845-f341-4ac4-d57d-a47d5b3ae10d"
      },
      "source": [
        "print(\"The predictions are\")\n",
        "print(melb_model.predict(X.head()))"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "The predictions are\n",
            "[1480000. 1035000. 1465000.  850000. 1600000.]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ExPfckCH_z-D",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn.metrics import mean_absolute_error"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "H_n35kWfkACc",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn.model_selection import train_test_split"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HWTFMHNAkQqK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZEdidI1GkjXV",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#Now lets build a code to determine the optimum number of leaves"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HFBy0dhek6mX",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def get_mae(nodes, X_test,y_test,X_train,y_train):\n",
        "  dt = DecisionTreeRegressor(max_leaf_nodes=nodes)\n",
        "  dt.fit(X_train,y_train)\n",
        "  y_p = dt.predict(X_test)\n",
        "  mae = mean_absolute_error(y_test,y_p)\n",
        "  return mae"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CmgOiLf3lt_q",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# now let's find out the optimum number of leaf nodes ie the number of levels of splitting."
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "AhYehFD0l2QA",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "leafs = [10,20,30,40,50,100,120,150,200,300,400,500,1000]\n",
        "error = []\n",
        "for x in leafs:\n",
        "  er = get_mae(x, X_test,y_test,X_train,y_train)\n",
        "  error.append(er)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_FhkVgEkmR5n",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "b826d4f2-b1ee-4759-d9d0-2813c126faeb"
      },
      "source": [
        "print(error)"
      ],
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[326997.712914613, 298333.1858883117, 281653.8106461913, 276092.3144024743, 271869.02401315205, 252313.0581927218, 249822.2545371738, 247247.83823948808, 237078.1365525934]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PSsra_3ymUFl",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 282
        },
        "outputId": "05e7feb1-8d81-4340-e980-a5ced089147c"
      },
      "source": [
        "plt.plot(leafs,error)"
      ],
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[<matplotlib.lines.Line2D at 0x7fe852038240>]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 41
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD4CAYAAAAZ1BptAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAffElEQVR4nO3de3RV9Z338fc3d8gJkJAAIUSDBUrxwi1FrLXTalW0HfCZsa0+M5VxaH3W6DyjtuvpaLvWuHqZGTtPn1qdaR2d0YodV621TqXWlkG0U3vxEiwXuUkQkEuAAOESIECS7/PH/iWchJxcIMnJOefzWuss9vnu3znnt7M1n/x+e5+9zd0RERHpSlayOyAiIkOXQkJERBJSSIiISEIKCRERSUghISIiCeUkuwP9rbS01KuqqpLdDRGRlLJixYp97l7WuZ52IVFVVUVNTU2yuyEiklLMbFtXdU03iYhIQgoJERFJSCEhIiIJKSRERCQhhYSIiCSkkBARkYQUEiIikpBCIli+fg/f+1VtsrshIjKkKCSCX79Tz7/+anOyuyEiMqQoJIJYQQ6NJ5rRTZhERE5TSASx/FxaHZpOtSa7KyIiQ4ZCIojlZwNw5MSpJPdERGToUEgEsYLoWodHT7QkuSciIkOHQiKI5ecC0NjUnOSeiIgMHQqJoFDTTSIiZ1BIBEVhJKHpJhGR0xQSQdtIolEjCRGRdgqJoO3AdaNGEiIi7RQSQZEOXIuInKHHkDCzAjN7w8xWmdlaM/tqqD9lZhvN7G0ze9zMckPdzOwhM6s1s9VmNivuvRaa2abwWBhXn21ma8JrHjIzC/USM1sW2i8zs+L+/xFECnKzyDJNN4mIxOvNSOIEcKW7TwdmAPPMbC7wFDAVuBgYBnwutL8OmBwetwEPQ/QLH7gPuBSYA9wX90v/YeDzca+bF+r3AMvdfTKwPDwfEGZGLD9HB65FROL0GBIeaQxPc8PD3f3FsM6BN4AJoc0C4Mmw6jVglJmVA9cCy9z9gLs3AMuIAqccGOHur4X3ehK4Ie69FoflxXH1ARHLz+GIpptERNr16piEmWWb2UpgL9Ev+tfj1uUCnwV+GUoVwPa4l+8Ite7qO7qoA4x197qwvBsYm6B/t5lZjZnV1NfX92aTuhQryOHoCYWEiEibXoWEu7e4+wyi0cIcM7sobvX3gF+7+6sD0cG4PjjQ5SVa3f1Rd6929+qysrKz/oxYfnQlWBERifTp7CZ3Pwi8QjhmYGb3AWXAF+Ka7QQq455PCLXu6hO6qAPsCdNRhH/39qW/fVWYn8MRhYSISLvenN1UZmajwvIw4Gpgg5l9jug4w83uHn997SXALeEsp7nAoTBltBS4xsyKwwHra4ClYd1hM5sbzmq6BXg+7r3azoJaGFcfEEWabhIR6SCnF23KgcVmlk0UKs+4+wtm1gxsA34fzlh9zt2/BrwIXA/UAseAWwHc/YCZfR14M7zv19z9QFi+HXiC6CypX4QHwP3AM2a2KHzWp89hW3sUy8/R9yREROL0GBLuvhqY2UW9y9eGYwd3JFj3OPB4F/Ua4KIu6vuBq3rqY38pzNdIQkQknr5xHacoP4fGk820tuoWpiIioJDooDA/B3c4dkpfqBMRAYVEB6fvTqcpJxERUEh0EMuPQkLfuhYRiSgk4rSFhEYSIiIRhUSctpDQt65FRCIKiTiFmm4SEelAIRGnSAeuRUQ6UEjE0XSTiEhHCok4hQoJEZEOFBJx8nOyyM02hYSISKCQiGNmFOoifyIi7RQSncR0kT8RkXYKiU5iuvGQiEg7hUQnGkmIiJymkOgkVqD7XIuItFFIdKK704mInKaQ6CSWr5GEiEgbhUQnCgkRkdMUEp0U5udw7GQLLbqFqYiIQqKz9ov8ndRoQkREIdFJ+0X+dPBaREQh0Vmh7k4nItJOIdFJLEw36VvXIiIKiTPoPtciIqcpJDrRMQkRkdMUEp20hYSmm0REFBJn0HSTiMhpColOCjXdJCLSTiHRSV5OFvk5WTTqy3QiIgqJruhKsCIiEYVEF3RPCRGRSI8hYWYFZvaGma0ys7Vm9tVQn2hmr5tZrZn9yMzyQj0/PK8N66vi3uveUN9oZtfG1eeFWq2Z3RNX7/IzBlphnu5OJyICvRtJnACudPfpwAxgnpnNBb4JPODuk4AGYFFovwhoCPUHQjvMbBpwE3AhMA/4npllm1k28F3gOmAacHNoSzefMaBiBTkc0XSTiEjPIeGRxvA0NzwcuBJ4NtQXAzeE5QXhOWH9VWZmof60u59w9y1ALTAnPGrd/V13Pwk8DSwIr0n0GQOqSPeUEBEBenlMIvzFvxLYCywDNgMH3b3tN+kOoCIsVwDbAcL6Q8Do+Hqn1ySqj+7mMzr37zYzqzGzmvr6+t5sUrcK8zXdJCICvQwJd29x9xnABKK//KcOaK/6yN0fdfdqd68uKys75/fTgWsRkUifzm5y94PAK8BlwCgzywmrJgA7w/JOoBIgrB8J7I+vd3pNovr+bj5jQOkWpiIikd6c3VRmZqPC8jDgamA9UVjcGJotBJ4Py0vCc8L6l93dQ/2mcPbTRGAy8AbwJjA5nMmUR3Rwe0l4TaLPGFAjh+XSdKqVplMtg/FxIiJDVk7PTSgHFoezkLKAZ9z9BTNbBzxtZt8A/gA8Fto/BvzAzGqBA0S/9HH3tWb2DLAOaAbucPcWADP7a2ApkA087u5rw3v9bYLPGFBlsXwA6o+coLJk+GB8pIjIkNRjSLj7amBmF/V3iY5PdK43AZ9K8F5/D/x9F/UXgRd7+xkDrbQo+jrGvkaFhIhkNn3jugtlsQIgGkmIiGQyhUQXTo8kTia5JyIiyaWQ6MLowuiYxL5GjSREJLMpJLqQl5PFqOG5mm4SkYynkEigNJavkYSIZDyFRAKlsTyNJEQk4ykkEigrKtBIQkQynkIigdJYns5uEpGMp5BIoDSWT+OJZo6f1KU5RCRzKSQSKCvSabAiIgqJBNqv36SQEJEMppBIoG0koTOcRCSTKSQSKI1puklERCGRwOhYdP0mjSREJJMpJBLIzc6ieHiuRhIiktEUEt0ojeWz74i+KyEimUsh0Y3SWL7ObhKRjKaQ6EZZkS7yJyKZTSHRjWi6SSEhIplLIdGN0qI8jp5s4djJ5mR3RUQkKRQS3Wj71rUOXotIplJIdKO07VvXjU1J7omISHIoJLrRfv0mjSREJEMpJLqhK8GKSKZTSHSjpFCX5hCRzKaQ6EZudhYlhXkaSYhIxlJI9CC6jalCQkQyk0KiB6WxfE03iUjGUkj0ILo0h85uEpHMpJDogUYSIpLJFBI9KCvK5/ipFg4dP5XsroiIDLoeQ8LMKs3sFTNbZ2ZrzezOUJ9hZq+Z2UozqzGzOaFuZvaQmdWa2WozmxX3XgvNbFN4LIyrzzazNeE1D5mZhXqJmS0L7ZeZWXH//wi6d0nFSABqth4Y7I8WEUm63owkmoEvuvs0YC5wh5lNA/4J+Kq7zwD+LjwHuA6YHB63AQ9D9AsfuA+4FJgD3Bf3S/9h4PNxr5sX6vcAy919MrA8PB9Us6uKKcjN4tVN+wb7o0VEkq7HkHD3Ond/KywfAdYDFYADI0KzkcCusLwAeNIjrwGjzKwcuBZY5u4H3L0BWAbMC+tGuPtr7u7Ak8ANce+1OCwvjqsPmvycbOZeMJpfb6of7I8WEUm6Ph2TMLMqYCbwOnAX8H/NbDvwLeDe0KwC2B73sh2h1l19Rxd1gLHuXheWdwNjE/TrtjDlVVNf3/+/zK+YXMa79UfZ0XCs399bRGQo63VImFkM+Alwl7sfBv4KuNvdK4G7gccGpouRMMrwBOsedfdqd68uKyvr98/+yORSAH6jKScRyTC9CgkzyyUKiKfc/blQXgi0Lf+Y6DgDwE6gMu7lE0Ktu/qELuoAe8J0FOHfvb3pb3+bNCbGuBEFOi4hIhmnN2c3GdEoYb27fztu1S7gj8LylcCmsLwEuCWc5TQXOBSmjJYC15hZcThgfQ2wNKw7bGZzw2fdAjwf915tZ0EtjKsPKjPjisml/KZ2Hy2tXQ5mRETSUk4v2lwOfBZYY2YrQ+3LRGcjPWhmOUAT0ZlMAC8C1wO1wDHgVgB3P2BmXwfeDO2+5u5t55XeDjwBDAN+ER4A9wPPmNkiYBvw6bPYxn5xxZQyfrxiB2t2HmJG5ahkdUNEZFD1GBLu/hvAEqye3UV7B+5I8F6PA493Ua8BLuqivh+4qqc+DoYPTyrFDH79Tr1CQkQyhr5x3UslhXlcNH4kr+pUWBHJIAqJPvjIlFLeeu8gh47pEh0ikhkUEn1w/cXltLQ63//dlmR3RURkUCgk+uDC8SO59sKxPPbqFg4e0+XDRST9KST66O6rp9B4splHf/1usrsiIjLgFBJ9NHXcCD5xcTlP/G4r+3VbUxFJcwqJs3DXx6fQdKqFf/3vzcnuiojIgFJInIVJY2LcMLOCJ3+/TXetE5G0ppA4S7d/9H2caG5lyapdPTcWEUlRComzNGlMEReOH6GQEJG0ppA4BwtmjGfV9oNs3Xc02V0RERkQColz8MlLxgPwM40mRCRNKSTOwfhRw5hTVcLzq3YRXddQRCS9KCTO0fwZ46nd28j6uiPJ7oqISL9TSJyj6y8uJyfLdABbRNKSQuIclRTmccXkUn62ahetumudiKQZhUQ/mD9jPDsPHuet9xqS3RURkX6lkOgHV08bR35OFs+v1JSTiKQXhUQ/iOXn8PFpY3lxTR3NLa3J7o6ISL9RSPST+dPHs//oSX67eX+yuyIi0m8UEv3ko+8vo6ggh+dX7kx2V0RE+o1Cop/k52Rz3UXj+K+1e2g61ZLs7oiI9AuFRD+aP72CxhPNvLxhb7K7IiLSLxQS/eiy942mNJbPEp3lJCJpQiHRj7KzjE9eUs7LG/dyuOlUsrsjInLOFBL9bP6M8ZxsbmXp27uT3RURkXOmkOhnMytHUVkyTNdyEpG0oJDoZ2bG/Onj+W3tPt3/WkRSnkJiACyYUUGrw4tr6pLdFRGRc6KQGABTxhYxdVyRppxEJOUpJAbIH08fz4ptDWw/cCzZXREROWs9hoSZVZrZK2a2zszWmtmdcev+t5ltCPV/iqvfa2a1ZrbRzK6Nq88LtVozuyeuPtHMXg/1H5lZXqjnh+e1YX1Vf234QJs/Pdz/erVGEyKSunozkmgGvuju04C5wB1mNs3MPgYsAKa7+4XAtwDMbBpwE3AhMA/4npllm1k28F3gOmAacHNoC/BN4AF3nwQ0AItCfRHQEOoPhHYpobJkOLPOG6Uv1olISusxJNy9zt3fCstHgPVABfBXwP3ufiKsa7sWxQLgaXc/4e5bgFpgTnjUuvu77n4SeBpYYGYGXAk8G16/GLgh7r0Wh+VngatC+5SwYEYFG3YfYeNu3f9aRFJTn45JhOmemcDrwBTgijAN9N9m9sHQrALYHveyHaGWqD4aOOjuzZ3qHd4rrD8U2nfu121mVmNmNfX19X3ZpAF1/cXlZBksWaUrw4pIaup1SJhZDPgJcJe7HwZygBKiKaj/AzyTrL/y3f1Rd6929+qysrJkdKFLZUX5XD6plJ+tqsNd978WkdTTq5Aws1yigHjK3Z8L5R3Acx55A2gFSoGdQGXcyyeEWqL6fmCUmeV0qhP/mrB+ZGifMuZPH897B46xcvvBZHdFRKTPenN2kwGPAevd/dtxq34KfCy0mQLkAfuAJcBN4cykicBk4A3gTWByOJMpj+jg9hKP/sR+BbgxvO9C4PmwvCQ8J6x/2VPsT/JrLxpHnu5/LSIpqjcjicuBzwJXmtnK8LgeeBy4wMzeJjoIvTCMKtYCzwDrgF8Cd7h7Szim8NfAUqKD38+EtgB/C3zBzGqJjjk8FuqPAaND/QtA+2mzqWJEQS5Xvn8ML6yuo6U1pfJNRARLsT/Me1RdXe01NTXJ7kYHL66p4/an3uI/Fl3KhyeXJrs7IiJnMLMV7l7dua5vXA+CK6eOIZafo7OcRCTlKCQGQUFuNtdcOJZfvL2bE826/7WIpA6FxCCZP308R5qa+dXGofM9DhGRnigkBsnlk0oZXZinK8OKSEpRSAyS3Owsrr+4nJfW7aHxRHPPLxARGQIUEoNo/ozxnGhuZdk63f9aRFKDQmIQzT6vmIpRw3RlWBFJGQqJQZSVZXxyejmvbtrHgaMnk90dEZEeKSQG2YLpFTS3uu5/LSIpQSExyD5QXsSkMTFNOYlISlBIDDIzY/708byx9QC7Dh5PdndERLqlkEiCtvtf3/2jlTz40iZeWL2L9XWHaTqlb2OLyNCS03MT6W9VpYXc/tH3sWTVLr6z/B3arrFoBhOKh7FgegVfuHoKWVkpc6dWEUlTugpskh0/2cKWfUfZXN/I5vpGVm0/yCsb6/nU7Anc/6eXkK2gEJFBkOgqsBpJJNmwvGymjR/BtPEjAHB3vvPSJh5cvonjp1p44DMzyM3WrKCIJIdCYogxM+6+egrD87L5x19soOlUC//yP2dRkJud7K6JSAbSn6hD1P/6o/fx9QUX8tL6vXxucQ3HTup6TyIy+BQSQ9hnL6viW5+azu827+OWx97gcNOpZHdJRDKMQmKIu3H2BP755lms3H6QP/u312nQ5TxEZBApJFLAJy4p59FbZrNxzxFuevQ19h5pSnaXRCRDKCRSxJVTx/LEX3yQ7Q3H+Mwjr7FT39YWkUGgkEghH5pUyg8WzWFf4wk+88jv2XtYIwoRGVgKiRQz+/wS/mPRpexvPMkinfUkIgNMIZGCpleO4p9vnsnaXYf4mx+upKU1vb41LyJDh0IiRX182lju++MLeWn9Hr7x83XJ7o6IpCl94zqFLfxQFdv2H+Px327hvJLh3Hr5xGR3SUTSjEIixX3lEx9gR8MxvvbCOipGDeOaC8clu0sikkY03ZTisrOMB2+aySUVI7nz6ZWs3nEw2V0SkTSikEgDw/Ky+feFH2R0LI+/fKKGHQ3Hkt0lEUkTCok0UVaUz/f/4oOcaG7h1u+/yaHjus6TiJw7hUQamTy2iEf+fDZb9x/l9qdWcLK5NdldEpEUp5BIMx+aVMo//skl/LZ2P1/5zzWk250HRWRw9RgSZlZpZq+Y2TozW2tmd3Za/0UzczMrDc/NzB4ys1ozW21ms+LaLjSzTeGxMK4+28zWhNc8ZGYW6iVmtiy0X2Zmxf236enrxtkTuPOqyfx4xQ7+5eXaZHdHRFJYb0YSzcAX3X0aMBe4w8ymQRQgwDXAe3HtrwMmh8dtwMOhbQlwH3ApMAe4L+6X/sPA5+NeNy/U7wGWu/tkYHl4Lr1w18cn8yczK/h/y97hp3/YmezuiEiK6jEk3L3O3d8Ky0eA9UBFWP0A8CUgfk5jAfCkR14DRplZOXAtsMzdD7h7A7AMmBfWjXD31zyaG3kSuCHuvRaH5cVxdemBmXH/n17C3AtK+NKzq3n93f3J7pKIpKA+HZMwsypgJvC6mS0Adrr7qk7NKoDtcc93hFp39R1d1AHGuntdWN4NjE3Qr9vMrMbMaurr6/uySWktLyeLR/68msqSYdz2gxVsrm9MdpdEJMX0OiTMLAb8BLiLaArqy8DfDVC/zhBGGV0ehXX3R9292t2ry8rKBqtLKWHk8FyeuHUOudnGrd9/k/2NJ5LdJRFJIb0KCTPLJQqIp9z9OeB9wERglZltBSYAb5nZOGAnUBn38gmh1l19Qhd1gD1hOorw796+bJxEKkuG82+3VLPncBOfe7KGplMtye6SiKSI3pzdZMBjwHp3/zaAu69x9zHuXuXuVURTRLPcfTewBLglnOU0FzgUpoyWAteYWXE4YH0NsDSsO2xmc8Nn3QI8Hz5+CdB2FtTCuLr00czzinnwphms3H6QLzyzUvehEJFe6c0F/i4HPgusMbOVofZld38xQfsXgeuBWuAYcCuAux8ws68Db4Z2X3P3A2H5duAJYBjwi/AAuB94xswWAduAT/dyu6QL8y4q5yvXf4Bv/Hw9/7V2D9PGj2D2+cV8sKqE6vOLGTOiINldFJEhxtLty1bV1dVeU1OT7G4MWe7O7zfv57eb91GztYFVOw7SdCr6ZnZlyTCqzy9h9vnFVFcVM2VMEVlZluQei8hgMLMV7l7dua5LhWcYM+NDk0r50KRSAE42t7Ku7jA1Ww9Qs7WBVzft4z/D9yqKCnKYdV4x1ecXM7uqmBmVoxiep/9kRDKJRhLSgbvz3oFj1GxtoGZbAyu2HeCdPdGpszlZpikqkTSVaCShkJAeHTp2irfea6Bm2wFNUYmkKU03yVkbOTyXj00dw8emjgE6TlGt2NbAb2o1RSWSrjSSkHPW2ymqthHH2BH5hGs4isgQoekmGVTdTVGVFOYxdVwRU8eNYGp5EVPHFTFlbBEFudlJ7rVI5tJ0kwyqRFNUK99rYOOeI6yvO8IP33iP4+Hb31kGVaWFfGDciChAyqN/JxQP06hDJIkUEjIo8nKymFE5ihmVo9prLa3RNNXG3YdZX3eEDbsP8/auQ/x8TV17m6L8HN4/riiMOEbwgfJo1FFUkJuMzRDJOJpukiGn8UQz7+w5woYQHBvqjrB+92GONJ2+lEhlybAoNOJGHeePLiRbZ1aJnBVNN0nKiOVHZ0jNOu/0jQjdnV2HmthQd5gNu4+wPvy7fP0eWsPfOQW5Wbx/bPyxjig8igvzkrQlIqlPIwlJaU2nWqjd29geGhvC1NWBoyfb24wbUcDUME01sbSQiaWFXFBaSFmRzrISaaORhKSlgtxsLqoYyUUVI9tr7k5944kO01Xr6g7zu837Odnc2t6uMC+bqrjQmFhWSNXoQi4ojTFyuI55iIBCQtKQmTGmqIAxRQV8ZMrpm1C1tDp1h46zZd9Rtuw7yrv10b9rdh7ixTV17dNWEJ2m2zbqiH9UjS5kWJ5O1ZXMoZCQjJGdZUwoHs6E4uFcMbnjHQxPNrfy3oFjIUAa24Pk1U31PLtiR4e240cWtI86JpYWckFZIRNLY0woHkZudp/uCCwy5CkkRIhO0Z00JsakMTE630q98UQzW0NotP377r6j/GzVLg7HnXGVk2WcVzK8fQqrbRqrqrSQooLT/6vFHwex9hpxtbj1XRwy6amtdWh75md1aKtjMtIDhYRID2L5OWcc94Do2EfDsVPto462Eci79Uf53eZ97d8wT0VnFTh0SK8zFnsThKfbdv1Z9NSvs+g3Pfbl7PrdodvW8d++vldvfwb/8D8uZs7Eki77cLYUEiJnycwoKcyjpDCP2ecXd1jX2ursOdLElvqjbNl/lOMno2+Wx59M6HgXtbjlHtrSoe25vVeHt217rx5eH1/vsW3Cfg3Az6Af+k1XP6MB/hnE1xP/PM7cxvgnhfn9f7xMISEyALKyjPKRwygfOaz9Bk8iqUhH2UREJCGFhIiIJKSQEBGRhBQSIiKSkEJCREQSUkiIiEhCCgkREUlIISEiIgml3f0kzKwe2NaHl5QC+waoO0NZJm53Jm4zZOZ2Z+I2w7lt9/nuXta5mHYh0VdmVtPVjTbSXSZudyZuM2TmdmfiNsPAbLemm0REJCGFhIiIJKSQgEeT3YEkycTtzsRthszc7kzcZhiA7c74YxIiIpKYRhIiIpKQQkJERBLK6JAws3lmttHMas3snmT3p7+YWaWZvWJm68xsrZndGeolZrbMzDaFf4tD3czsofBzWG1ms5K7BWfPzLLN7A9m9kJ4PtHMXg/b9iMzywv1/PC8NqyvSma/z4WZjTKzZ81sg5mtN7PL0n1fm9nd4b/tt83sh2ZWkI772sweN7O9ZvZ2XK3P+9bMFob2m8xsYV/6kLEhYWbZwHeB64BpwM1mNi25veo3zcAX3X0aMBe4I2zbPcByd58MLA/PIfoZTA6P24CHB7/L/eZOYH3c828CD7j7JKABWBTqi4CGUH8gtEtVDwK/dPepwHSi7U/bfW1mFcDfANXufhGQDdxEeu7rJ4B5nWp92rdmVgLcB1wKzAHuawuWXnH3jHwAlwFL457fC9yb7H4N0LY+D1wNbATKQ60c2BiWHwFujmvf3i6VHsCE8D/NlcALRPeK3wfkdN7nwFLgsrCcE9pZsrfhLLZ5JLClc9/TeV8DFcB2oCTsuxeAa9N1XwNVwNtnu2+Bm4FH4uod2vX0yNiRBKf/Q2uzI9TSShhazwReB8a6e11YtRsYG5bT5WfxHeBLQGt4Pho46O7N4Xn8drVvc1h/KLRPNROBeuD7YZrt382skDTe1+6+E/gW8B5QR7TvVpD++7pNX/ftOe3zTA6JtGdmMeAnwF3ufjh+nUd/UqTN+c9m9klgr7uvSHZfBlkOMAt42N1nAkc5Pf0ApOW+LgYWEAXkeKCQM6dkMsJg7NtMDomdQGXc8wmhlhbMLJcoIJ5y9+dCeY+ZlYf15cDeUE+Hn8XlwHwz2wo8TTTl9CAwysxyQpv47Wrf5rB+JLB/MDvcT3YAO9z99fD8WaLQSOd9/XFgi7vXu/sp4Dmi/Z/u+7pNX/ftOe3zTA6JN4HJ4YyIPKIDX0uS3Kd+YWYGPAasd/dvx61aArSd2bCQ6FhFW/2WcHbEXOBQ3HA2Jbj7ve4+wd2riPbly+7+Z8ArwI2hWedtbvtZ3Bjap9xf2+6+G9huZu8PpauAdaTxviaaZpprZsPDf+tt25zW+zpOX/ftUuAaMysOo7BrQq13kn1QJskHhK4H3gE2A19Jdn/6cbs+TDQEXQ2sDI/rieZhlwObgJeAktDeiM702gysITprJOnbcQ7b/1HghbB8AfAGUAv8GMgP9YLwvDasvyDZ/T6H7Z0B1IT9/VOgON33NfBVYAPwNvADID8d9zXwQ6LjLqeIRo2LzmbfAn8Ztr8WuLUvfdBlOUREJKFMnm4SEZEeKCRERCQhhYSIiCSkkBARkYQUEiIikpBCQkREElJIiIhIQv8fLtXgfwNA5z0AAAAASUVORK5CYII=\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "V08NtNRImark",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# 450 something is the optimum number of leave nodes. \n",
        "#Now this is validated, and hence you can apply this to the whole data set. \n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gvqwW0DMpGxG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}